Valorant is a free-to-play first-person hero shooter developed and published by Riot Games, for Microsoft Windows. First teased under the codename Project A in October 2019, the game began a closed beta period with limited access on April 7, 2020, followed by an official release on June 2, 2020.
The weapons dataset is based on their first ever major tournament, Stage 2: "Masters" of the VCT (Valorant Champions Tour) 2021 which took place between 24th May and 30th May in Iceland.
And the new dataset is taken from the game directly (patch 4.04)
sns.kdeplot(data=df_diff['Price'], shade=True);
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
weapon_type = dict(Counter(df_new['Weapon Type']))
weapon_type = {'Weapon Type': list(weapon_type.keys()), 'count': list(weapon_type.values())}
fig_weapon = px.pie(weapon_type, values = 'count', names = 'Weapon Type', title = 'Weapon Type Distribution', hole = .5, )
fig_weapon.show()
headshot_dict = return_sorted('HDMG_0')
bodyshot_dict = return_sorted('BDMG_0')
fig_headshot = px.bar(headshot_dict, x = 'weapon', y = 'values', title = 'Weapon Headshot Distribution')
fig_headshot.show()
fig_headshot = px.bar(bodyshot_dict, x = 'weapon', y = 'values', title = 'Weapon Bodyshot Distribution')
fig_headshot.show()
df_new.describe()
| Price | Fire Rate | Magazine Capacity | Spread ADS | Spread HIP | HDMG_0 | BDMG_0 | LDMG_0 | HDMG_1 | BDMG_1 | LDMG_1 | HDMG_2 | BDMG_2 | LDMG_2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 | 17.000000 |
| mean | 1629.411765 | 7.697647 | 21.235294 | 0.779824 | 1.173529 | 112.529412 | 43.352941 | 35.764706 | 107.647059 | 41.352941 | 34.176471 | 101.588235 | 39.294118 | 32.764706 |
| std | 1261.578726 | 4.913525 | 23.823647 | 1.114471 | 1.443749 | 65.352044 | 34.622863 | 27.673384 | 67.755942 | 35.862482 | 30.406075 | 70.336743 | 36.706206 | 30.918298 |
| min | 0.000000 | 0.600000 | 2.000000 | 0.000000 | 0.100000 | 24.000000 | 12.000000 | 10.000000 | 20.000000 | 8.000000 | 5.000000 | 9.000000 | 3.000000 | 2.000000 |
| 25% | 800.000000 | 3.500000 | 6.000000 | 0.157000 | 0.300000 | 72.000000 | 26.000000 | 22.000000 | 63.000000 | 22.000000 | 18.000000 | 63.000000 | 22.000000 | 18.000000 |
| 50% | 1600.000000 | 6.750000 | 13.000000 | 0.300000 | 0.450000 | 95.000000 | 30.000000 | 25.000000 | 88.000000 | 30.000000 | 25.000000 | 77.000000 | 28.000000 | 23.000000 |
| 75% | 2250.000000 | 12.000000 | 25.000000 | 0.790000 | 1.000000 | 159.000000 | 40.000000 | 34.000000 | 160.000000 | 40.000000 | 34.000000 | 145.000000 | 40.000000 | 34.000000 |
| max | 4700.000000 | 16.000000 | 100.000000 | 4.000000 | 5.000000 | 255.000000 | 150.000000 | 120.000000 | 255.000000 | 150.000000 | 127.000000 | 255.000000 | 150.000000 | 127.000000 |
df_new=df_new.set_index("Name")
df_anal=df_new.iloc[:, :10]
cols = df_anal.columns.tolist()
cols = [cols[0]]+cols[2:]+[cols[1]]
df_anal = df_anal[cols]
df_anal
| Weapon Type | Fire Rate | Wall Penetration | Magazine Capacity | Spread ADS | Spread HIP | HDMG_0 | BDMG_0 | LDMG_0 | Price | |
|---|---|---|---|---|---|---|---|---|---|---|
| Name | ||||||||||
| Classic | Sidearm | 6.75 | Low | 12 | 0.400 | 0.40 | 78 | 26 | 22 | 0 |
| Shorty | Sidearm | 3.33 | Low | 2 | 4.000 | 4.00 | 24 | 12 | 10 | 150 |
| Frenzy | Sidearm | 13.00 | Low | 13 | 0.450 | 0.45 | 78 | 26 | 22 | 450 |
| Ghost | Sidearm | 6.75 | Medium | 15 | 0.300 | 0.30 | 105 | 30 | 25 | 500 |
| Sheriff | Sidearm | 4.00 | High | 6 | 0.250 | 0.25 | 159 | 55 | 46 | 800 |
| Stinger | SMG | 16.00 | Low | 20 | 0.500 | 0.65 | 67 | 27 | 22 | 950 |
| Spectre | SMG | 13.33 | Medium | 30 | 0.250 | 0.40 | 78 | 26 | 22 | 1600 |
| Bulldog | Rifle | 10.00 | Medium | 24 | 0.300 | 0.30 | 115 | 35 | 29 | 2050 |
| Guardian | Rifle | 5.25 | Medium | 12 | 0.000 | 0.10 | 195 | 65 | 48 | 2250 |
| Phantom | Rifle | 11.00 | Medium | 30 | 0.110 | 0.20 | 156 | 39 | 33 | 2900 |
| Vandal | Rifle | 9.75 | Medium | 25 | 0.157 | 0.25 | 160 | 40 | 34 | 2900 |
| Marshall | Sniper | 1.50 | Medium | 5 | 0.000 | 1.00 | 202 | 101 | 85 | 950 |
| Operator | Sniper | 0.60 | High | 5 | 0.000 | 5.00 | 255 | 150 | 120 | 4700 |
| Bucky | Shotgun | 1.10 | Low | 5 | 2.600 | 2.60 | 40 | 20 | 19 | 850 |
| Judge | Shotgun | 3.50 | Medium | 7 | 2.250 | 2.25 | 34 | 17 | 14 | 1850 |
| Ares | Heavy | 13.00 | High | 50 | 0.900 | 1.00 | 72 | 30 | 25 | 1600 |
| Odin | Heavy | 12.00 | High | 100 | 0.790 | 0.80 | 95 | 38 | 32 | 3200 |
# Correlation matrix
# from https://www.kaggle.com/kerneler/starter-valorant-weapon-stats-f856dcf8-1
def plotCorrelationMatrix(df, graphWidth):
df = df.dropna('columns') # drop columns with NaN
df = df[[col for col in df if df[col].nunique() > 1]] # keep columns where there are more than 1 unique values
if df.shape[1] < 2:
print(f'No correlation plots shown: The number of non-NaN or constant columns ({df.shape[1]}) is less than 2')
return
corr = df.corr()
plt.figure(num=None, figsize=(graphWidth, graphWidth), dpi=80, facecolor='w', edgecolor='k')
corrMat = plt.matshow(corr, fignum = 1)
plt.xticks(range(len(corr.columns)), corr.columns, rotation=90)
plt.yticks(range(len(corr.columns)), corr.columns)
plt.gca().xaxis.tick_bottom()
plt.colorbar(corrMat)
plt.title(f'Correlation Matrix for the weapons dataframe', fontsize=15)
plt.show()
plotCorrelationMatrix(df_new, 8)
C:\Users\anass\AppData\Local\Temp/ipykernel_82712/3485586321.py:5: FutureWarning: In a future version of pandas all arguments of DataFrame.dropna will be keyword-only.
import warnings
warnings.filterwarnings('ignore')
sns.pairplot(df_anal);
sns.pairplot(df_anal,hue='Weapon Type');
#,hue='Gender'
What we can see here us that the damages (Head, body and leg ) are correlated between each other. The price is kind of correlated with the damage and the magazine capacity (with some outliers). And the fire rate is kind of independent from the rest.
First we try with categorical variables, then without categorical variables.
The dataset set is small so there isn't much to draw from it but we are just playing
def get_X_y(columns):
X = df_anal[columns] #
Y = df_anal['Price']
X = pd.get_dummies(data=X)#, drop_first=True)
return X,Y
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error as msr
from sklearn.preprocessing import MinMaxScaler
def model_analysis(X,Y,loga=False,drop_first=False,preprocessing = True):
if loga:
# We shouldnt have any negative values so we might try the Logarithm
Y_log = Y.apply(lambda x: np.log(x))
Y_log = Y_log.drop(labels=['Classic'])
X = X.drop(['Classic'])
X = pd.get_dummies(data=X, drop_first=drop_first)
Y = Y_log
if preprocessing :
scaler = MinMaxScaler()
X_tr=scaler.fit_transform(X)
X=pd.DataFrame(X_tr, index=X.index, columns=X.columns)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=101)
model = LinearRegression()
model.fit(X_train,y_train)
coeff_parameter = pd.DataFrame(model.coef_,X.columns,columns=['Coefficient'])
predictions = model.predict(X_test)
sns.regplot(y_test,predictions)
X_train_Sm= sm.add_constant(X_train)
ls=sm.OLS(y_train,X_train_Sm).fit()
print(ls.summary())
if loga:
#test_score = model.score(X_test, y_test)
model_score = model.score(X, Y)
test_score = msr(np.exp(model.predict(X_test)), np.exp(y_test))
model_score2 = msr(np.exp(model.predict(X)), np.exp(Y_log))
else:
#test_score = model.score(X_test, y_test)
model_score = model.score(X, Y)
test_score = msr(model.predict(X_test),y_test)
model_score2 = msr(model.predict(X),Y)
return coeff_parameter,test_score,model_score,model_score2
def model_analysis_no_plot(X,Y,loga=False,drop_first=False,preprocessing = True):
if loga:
# We shouldnt have any negative values so we might try the Logarithm
Y_log = Y.apply(lambda x: np.log(x))
Y_log = Y_log.drop(labels=['Classic'])
X = X.drop(['Classic'])
X = pd.get_dummies(data=X, drop_first=drop_first)
Y = Y_log
if preprocessing :
scaler = MinMaxScaler()
X_tr=scaler.fit_transform(X)
X=pd.DataFrame(X_tr, index=X.index, columns=X.columns)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=101)
model = LinearRegression()
model.fit(X_train,y_train)
predictions = model.predict(X_test)
if loga:
test_score = msr(np.exp(model.predict(X_test)), np.exp(y_test))
model_score = msr(np.exp(model.predict(X)), np.exp(Y_log))
else:
test_score = msr(model.predict(X_test),y_test)
model_score = msr(model.predict(X),Y)
return test_score,model_score,model
columns=['Weapon Type', 'Wall Penetration','Fire Rate','Magazine Capacity','Spread ADS','Spread HIP','HDMG_0','BDMG_0','LDMG_0']
X,Y = get_X_y(columns)
X.head()
| Fire Rate | Magazine Capacity | Spread ADS | Spread HIP | HDMG_0 | BDMG_0 | LDMG_0 | Weapon Type_Heavy | Weapon Type_Rifle | Weapon Type_SMG | Weapon Type_Shotgun | Weapon Type_Sidearm | Weapon Type_Sniper | Wall Penetration_High | Wall Penetration_Low | Wall Penetration_Medium | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Name | ||||||||||||||||
| Classic | 6.75 | 12 | 0.40 | 0.40 | 78 | 26 | 22 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| Shorty | 3.33 | 2 | 4.00 | 4.00 | 24 | 12 | 10 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| Frenzy | 13.00 | 13 | 0.45 | 0.45 | 78 | 26 | 22 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 |
| Ghost | 6.75 | 15 | 0.30 | 0.30 | 105 | 30 | 25 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 1 |
| Sheriff | 4.00 | 6 | 0.25 | 0.25 | 159 | 55 | 46 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 |
Let's see some cases
#for preprocessing in True,False :
# for drop_first in True,False :
# for loga in True,False :
preprocessing,drop_first,loga = True, False, True
print('preprocessing:',preprocessing ,"--",'drop_first:',drop_first ,"--",'loga:',loga)
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
#coeff_parameter,test_score,model_score,model_score2
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot,log:{},preprocessing:{},drop:{}".format(loga,preprocessing,drop_first))
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3] )
preprocessing: True -- drop_first: False -- loga: True
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 1.000
Model: OLS Adj. R-squared: nan
Method: Least Squares F-statistic: nan
Date: Sat, 05 Mar 2022 Prob (F-statistic): nan
Time: 21:01:37 Log-Likelihood: 329.24
No. Observations: 11 AIC: -636.5
Df Residuals: 0 BIC: -632.1
Df Model: 10
Covariance Type: nonrobust
===========================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------------
const 5.2806 inf 0 nan nan nan
Fire Rate -0.9532 inf -0 nan nan nan
Magazine Capacity 1.2337 inf 0 nan nan nan
Spread ADS -2.0044 inf -0 nan nan nan
Spread HIP 0.3846 inf 0 nan nan nan
HDMG_0 2.0795 inf 0 nan nan nan
BDMG_0 -4.0065 inf -0 nan nan nan
LDMG_0 -0.1950 inf -0 nan nan nan
Weapon Type_Heavy 0.2138 inf 0 nan nan nan
Weapon Type_Rifle 1.3001 inf 0 nan nan nan
Weapon Type_SMG 1.2093 inf 0 nan nan nan
Weapon Type_Shotgun 0 nan nan nan nan nan
Weapon Type_Sidearm 0.2272 inf 0 nan nan nan
Weapon Type_Sniper 2.3302 inf 0 nan nan nan
Wall Penetration_High 2.5440 inf 0 nan nan nan
Wall Penetration_Low 1.3701 inf 0 nan nan nan
Wall Penetration_Medium 1.3665 inf 0 nan nan nan
==============================================================================
Omnibus: 20.841 Durbin-Watson: 2.338
Prob(Omnibus): 0.000 Jarque-Bera (JB): 14.900
Skew: -2.161 Prob(JB): 0.000582
Kurtosis: 6.718 Cond. No. 347.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
Model test error : 1993498.7978286124
Model total error : 622968.3743214415
The price is positively correlated witht the head damage, but it is weird that's the coefficient is negative for the body damage. The spead is negative as expected, (HIP less infulential), and the fire rate has a negative coeff too. The category is having a considerable effect too.
preprocessing,drop_first,loga = False, True, True
print('preprocessing:',preprocessing ,"--",'drop_first:',drop_first ,"--",'loga:',loga)
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
#coeff_parameter,test_score,model_score,model_score2
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot,log:{},preprocessing:{},drop:{}".format(loga,preprocessing,drop_first))
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3] )
preprocessing: False -- drop_first: True -- loga: True
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 1.000
Model: OLS Adj. R-squared: nan
Method: Least Squares F-statistic: nan
Date: Sat, 05 Mar 2022 Prob (F-statistic): nan
Time: 21:01:37 Log-Likelihood: 313.90
No. Observations: 11 AIC: -605.8
Df Residuals: 0 BIC: -601.4
Df Model: 10
Covariance Type: nonrobust
===========================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------------
const 1.4781 inf 0 nan nan nan
Fire Rate 0.1336 inf 0 nan nan nan
Magazine Capacity -0.0020 inf -0 nan nan nan
Spread ADS 1.6953 inf 0 nan nan nan
Spread HIP -1.2679 inf -0 nan nan nan
HDMG_0 -0.0057 inf -0 nan nan nan
BDMG_0 -0.1538 inf -0 nan nan nan
LDMG_0 0.3172 inf 0 nan nan nan
Weapon Type_Heavy 0.8321 inf 0 nan nan nan
Weapon Type_Rifle 0.5062 inf 0 nan nan nan
Weapon Type_SMG 0.6632 inf 0 nan nan nan
Weapon Type_Shotgun 0 nan nan nan nan nan
Weapon Type_Sidearm 0.0426 inf 0 nan nan nan
Weapon Type_Sniper -0.5661 inf -0 nan nan nan
Wall Penetration_High 0.2660 inf 0 nan nan nan
Wall Penetration_Low 0.1493 inf 0 nan nan nan
Wall Penetration_Medium 1.0628 inf 0 nan nan nan
==============================================================================
Omnibus: 0.832 Durbin-Watson: 0.648
Prob(Omnibus): 0.660 Jarque-Bera (JB): 0.707
Skew: 0.490 Prob(JB): 0.702
Kurtosis: 2.238 Cond. No. 4.82e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
[3] The condition number is large, 4.82e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Model test error : 4034693.0969555243
Model total error : 1260841.5927986014
Without scaling, the categorical coeff has more value, which expected (as an equilibrium with the previous case)
preprocessing,drop_first,loga = True, False, False
print('preprocessing:',preprocessing ,"--",'drop_first:',drop_first ,"--",'loga:',loga)
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
#coeff_parameter,test_score,model_score,model_score2
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot,log:{},preprocessing:{},drop:{}".format(loga,preprocessing,drop_first))
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3])
preprocessing: True -- drop_first: False -- loga: False
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 1.000
Model: OLS Adj. R-squared: nan
Method: Least Squares F-statistic: nan
Date: Sat, 05 Mar 2022 Prob (F-statistic): nan
Time: 21:01:38 Log-Likelihood: 272.47
No. Observations: 11 AIC: -522.9
Df Residuals: 0 BIC: -518.6
Df Model: 10
Covariance Type: nonrobust
===========================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------------
const -434.6740 inf -0 nan nan nan
Fire Rate 1382.6125 inf 0 nan nan nan
Magazine Capacity -185.6431 inf -0 nan nan nan
Spread ADS 806.5477 inf 0 nan nan nan
Spread HIP 3563.2003 inf 0 nan nan nan
HDMG_0 6051.7006 inf 0 nan nan nan
BDMG_0 -4724.2348 inf -0 nan nan nan
LDMG_0 -1500.4482 inf -0 nan nan nan
Weapon Type_Heavy 205.0651 inf 0 nan nan nan
Weapon Type_Rifle 573.9149 inf 0 nan nan nan
Weapon Type_SMG 339.4114 inf 0 nan nan nan
Weapon Type_Shotgun 0 nan nan nan nan nan
Weapon Type_Sidearm -2049.6501 inf -0 nan nan nan
Weapon Type_Sniper 496.5848 inf 0 nan nan nan
Wall Penetration_High 1253.5542 inf 0 nan nan nan
Wall Penetration_Low -1253.3397 inf -0 nan nan nan
Wall Penetration_Medium -434.8885 inf -0 nan nan nan
==============================================================================
Omnibus: 1.666 Durbin-Watson: 0.846
Prob(Omnibus): 0.435 Jarque-Bera (JB): 1.096
Skew: -0.716 Prob(JB): 0.578
Kurtosis: 2.416 Cond. No. 239.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The input rank is higher than the number of observations.
Model test error : 2331658.5946693993
Model total error : 822938.3275303763
Now without the categories
columns=['Fire Rate','Magazine Capacity','Spread ADS','Spread HIP','HDMG_0','BDMG_0','LDMG_0']
X,Y = get_X_y(columns)
X.head()
| Fire Rate | Magazine Capacity | Spread ADS | Spread HIP | HDMG_0 | BDMG_0 | LDMG_0 | |
|---|---|---|---|---|---|---|---|
| Name | |||||||
| Classic | 6.75 | 12 | 0.40 | 0.40 | 78 | 26 | 22 |
| Shorty | 3.33 | 2 | 4.00 | 4.00 | 24 | 12 | 10 |
| Frenzy | 13.00 | 13 | 0.45 | 0.45 | 78 | 26 | 22 |
| Ghost | 6.75 | 15 | 0.30 | 0.30 | 105 | 30 | 25 |
| Sheriff | 4.00 | 6 | 0.25 | 0.25 | 159 | 55 | 46 |
preprocessing,drop_first,loga = True, True, True
#the drop_first doesnt matter here
print('preprocessing:',preprocessing ,"--",'drop_first:',drop_first ,"--",'loga:',loga)
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
#coeff_parameter,test_score,model_score,model_score2
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot,log:{},preprocessing:{},drop:{}".format(loga,preprocessing,drop_first))
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3])
preprocessing: True -- drop_first: True -- loga: True
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 0.950
Model: OLS Adj. R-squared: 0.833
Method: Least Squares F-statistic: 8.105
Date: Sat, 05 Mar 2022 Prob (F-statistic): 0.0567
Time: 21:01:38 Log-Likelihood: 1.3805
No. Observations: 11 AIC: 13.24
Df Residuals: 3 BIC: 16.42
Df Model: 7
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------
const 7.3929 2.646 2.794 0.068 -1.029 15.815
Fire Rate -0.6564 2.074 -0.317 0.772 -7.257 5.944
Magazine Capacity 2.9882 1.437 2.080 0.129 -1.584 7.560
Spread ADS -9.3208 7.878 -1.183 0.322 -34.392 15.750
Spread HIP 8.8874 8.215 1.082 0.359 -17.256 35.031
HDMG_0 6.1060 4.053 1.506 0.229 -6.793 19.005
BDMG_0 15.4863 23.029 0.672 0.549 -57.801 88.773
LDMG_0 -29.5039 34.773 -0.848 0.459 -140.168 81.160
==============================================================================
Omnibus: 1.388 Durbin-Watson: 2.748
Prob(Omnibus): 0.499 Jarque-Bera (JB): 0.081
Skew: -0.127 Prob(JB): 0.960
Kurtosis: 3.337 Cond. No. 467.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Model test error : 979903.678594393
Model total error : 375070.11838569085
It's weird that the Leg damage has a relatively big coefficient (in the absolute value), and that the HIP has a positive coefficient. Note that while the error is big the performance of the model is better than without the spread variables.
preprocessing,drop_first,loga = False, True, False
#the drop_first doesnt matter here
print('preprocessing:',preprocessing ,"--",'drop_first:',drop_first ,"--",'loga:',loga)
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
#coeff_parameter,test_score,model_score,model_score2
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot,log:{},preprocessing:{},drop:{}".format(loga,preprocessing,drop_first))
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3])
preprocessing: False -- drop_first: True -- loga: False
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 0.985
Model: OLS Adj. R-squared: 0.949
Method: Least Squares F-statistic: 27.43
Date: Sat, 05 Mar 2022 Prob (F-statistic): 0.0101
Time: 21:01:38 Log-Likelihood: -71.227
No. Observations: 11 AIC: 158.5
Df Residuals: 3 BIC: 161.6
Df Model: 7
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------
const -885.7179 1784.363 -0.496 0.654 -6564.359 4792.923
Fire Rate 39.3979 79.660 0.495 0.655 -214.115 292.911
Magazine Capacity 28.1403 4.700 5.987 0.009 13.183 43.098
Spread ADS -1013.4148 532.605 -1.903 0.153 -2708.400 681.571
Spread HIP 1210.3934 245.350 4.933 0.016 429.581 1991.206
HDMG_0 29.8647 5.852 5.103 0.015 11.240 48.489
BDMG_0 -18.3923 44.543 -0.413 0.707 -160.149 123.364
LDMG_0 -45.5457 51.049 -0.892 0.438 -208.007 116.916
==============================================================================
Omnibus: 2.506 Durbin-Watson: 2.201
Prob(Omnibus): 0.286 Jarque-Bera (JB): 0.330
Skew: 0.021 Prob(JB): 0.848
Kurtosis: 3.848 Cond. No. 3.56e+03
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 3.56e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
Model test error : 1013268.5170922922
Model total error : 373573.136709475
#comparing errors
error_total = dict()
error_test = dict()
for preprocessing in True,False :
for drop_first in True,False :
for loga in True,False :
for category in True,False :
if category == True :
columns = ['Weapon Type', 'Wall Penetration','Fire Rate','Magazine Capacity','HDMG_0','BDMG_0','LDMG_0']
else :
columns = ['Fire Rate','Magazine Capacity','HDMG_0','BDMG_0','LDMG_0']
X,Y = get_X_y(columns)
results = model_analysis_no_plot(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
error_total[str((preprocessing,drop_first,loga,category))]=results[1]
error_test[str((preprocessing,drop_first,loga,category))]=results[0]
#plt.bar(list(error_total.keys()), list(error_total.values()))
sns.barplot(x=list(error_total.keys()),y=list(error_total.values()));
plt.xticks(rotation=90)
plt.tight_layout()
sns.barplot(x=list(error_test.keys()),y=list(error_test.values()));
plt.xticks(rotation=90)
plt.tight_layout()
# without the four outliers
outliers= [(True, True, False, True),(True, False, False, True),(False, True, False, True),(False, False, False, True)]
for outlier in outliers:
del error_total[str(outlier)]
del error_test[str(outlier)]
sns.barplot(x=list(error_total.keys()),y=list(error_total.values()));
plt.xticks(rotation=90)
plt.tight_layout()
sns.barplot(x=list(error_test.keys()),y=list(error_test.values()));
plt.xticks(rotation=90)
plt.tight_layout()
All the errors are huge, this is not something to model with linear regression , also other variables aren't taken into consideration , like the spray.
#The T,F,T,F example
columns=['Fire Rate','Magazine Capacity','HDMG_0','BDMG_0','LDMG_0']
X,Y = get_X_y(columns)
preprocessing,drop_first,loga = True, False, True
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot")
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3] )
model = model_analysis_no_plot(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)[2]
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 0.925
Model: OLS Adj. R-squared: 0.851
Method: Least Squares F-statistic: 12.41
Date: Sat, 05 Mar 2022 Prob (F-statistic): 0.00760
Time: 21:01:40 Log-Likelihood: -0.79261
No. Observations: 11 AIC: 13.59
Df Residuals: 5 BIC: 15.97
Df Model: 5
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------
const 4.8563 0.457 10.637 0.000 3.683 6.030
Fire Rate 1.0460 0.597 1.751 0.140 -0.490 2.582
Magazine Capacity 1.4926 0.506 2.948 0.032 0.191 2.794
HDMG_0 3.5055 0.871 4.025 0.010 1.267 5.744
BDMG_0 -4.4123 9.737 -0.453 0.669 -29.443 20.618
LDMG_0 4.4560 9.532 0.467 0.660 -20.046 28.958
==============================================================================
Omnibus: 3.527 Durbin-Watson: 2.492
Prob(Omnibus): 0.171 Jarque-Bera (JB): 1.004
Skew: -0.655 Prob(JB): 0.605
Kurtosis: 3.690 Cond. No. 154.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Model test error : 1273102.7250278767
Model total error : 486564.3119100869
scaler = MinMaxScaler()
X_tr=scaler.fit_transform(X[1:])
X=pd.DataFrame(X_tr, index=X[1:].index, columns=X[1:].columns)
predicted = pd.Series(np.exp(model.predict(X)), index=Y[1:].index)
predicted = predicted.astype(int)
comparaison = predicted.to_frame(name="predicted")
comparaison["Actual value"] = Y[1:]
comparaison
| predicted | Actual value | |
|---|---|---|
| Name | ||
| Shorty | 154 | 150 |
| Frenzy | 832 | 450 |
| Ghost | 839 | 500 |
| Sheriff | 1451 | 800 |
| Stinger | 930 | 950 |
| Spectre | 1102 | 1600 |
| Bulldog | 1401 | 2050 |
| Guardian | 2354 | 2250 |
| Phantom | 3167 | 2900 |
| Vandal | 2889 | 2900 |
| Marshall | 2583 | 950 |
| Operator | 4680 | 4700 |
| Bucky | 197 | 850 |
| Judge | 197 | 1850 |
| Ares | 1326 | 1600 |
| Odin | 3868 | 3200 |
#The T,F,T,F example with the spread
columns=['Fire Rate','Magazine Capacity','Spread ADS','Spread HIP','HDMG_0','BDMG_0','LDMG_0']
X,Y = get_X_y(columns)
preprocessing,drop_first,loga = True, False, True
results = model_analysis(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)
coeff_parameter = results[0]
coeff_parameter.plot(kind="barh", figsize=(9, 7))
plt.title("Coefficients plot")
plt.axvline(x=0, color=".5")
plt.subplots_adjust(left=0.3)
print("Model test error :", results[1])
print("Model total error :",results[3] )
model = model_analysis_no_plot(X,Y,loga=loga,drop_first=drop_first,preprocessing = preprocessing)[2]
OLS Regression Results
==============================================================================
Dep. Variable: Price R-squared: 0.950
Model: OLS Adj. R-squared: 0.833
Method: Least Squares F-statistic: 8.105
Date: Sat, 05 Mar 2022 Prob (F-statistic): 0.0567
Time: 21:01:40 Log-Likelihood: 1.3805
No. Observations: 11 AIC: 13.24
Df Residuals: 3 BIC: 16.42
Df Model: 7
Covariance Type: nonrobust
=====================================================================================
coef std err t P>|t| [0.025 0.975]
-------------------------------------------------------------------------------------
const 7.3929 2.646 2.794 0.068 -1.029 15.815
Fire Rate -0.6564 2.074 -0.317 0.772 -7.257 5.944
Magazine Capacity 2.9882 1.437 2.080 0.129 -1.584 7.560
Spread ADS -9.3208 7.878 -1.183 0.322 -34.392 15.750
Spread HIP 8.8874 8.215 1.082 0.359 -17.256 35.031
HDMG_0 6.1060 4.053 1.506 0.229 -6.793 19.005
BDMG_0 15.4863 23.029 0.672 0.549 -57.801 88.773
LDMG_0 -29.5039 34.773 -0.848 0.459 -140.168 81.160
==============================================================================
Omnibus: 1.388 Durbin-Watson: 2.748
Prob(Omnibus): 0.499 Jarque-Bera (JB): 0.081
Skew: -0.127 Prob(JB): 0.960
Kurtosis: 3.337 Cond. No. 467.
==============================================================================
Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
Model test error : 979903.678594393
Model total error : 375070.11838569085
scaler = MinMaxScaler()
X_tr=scaler.fit_transform(X[1:])
X=pd.DataFrame(X_tr, index=X[1:].index, columns=X[1:].columns)
predicted = pd.Series(np.exp(model.predict(X)), index=Y[1:].index)
predicted = predicted.astype(int)
comparaison = predicted.to_frame(name="predicted")
comparaison["Actual value"] = Y[1:]
comparaison
| predicted | Actual value | |
|---|---|---|
| Name | ||
| Shorty | 152 | 150 |
| Frenzy | 710 | 450 |
| Ghost | 1523 | 500 |
| Sheriff | 329 | 800 |
| Stinger | 828 | 950 |
| Spectre | 1712 | 1600 |
| Bulldog | 1362 | 2050 |
| Guardian | 2380 | 2250 |
| Phantom | 3224 | 2900 |
| Vandal | 2725 | 2900 |
| Marshall | 38 | 950 |
| Operator | 4723 | 4700 |
| Bucky | 127 | 850 |
| Judge | 340 | 1850 |
| Ares | 1247 | 1600 |
| Odin | 3705 | 3200 |
scaler.get_params()
{'clip': False, 'copy': True, 'feature_range': (0, 1)}
equation = ['Price'+'=']+['exp(']+[str(round(model.coef_[i], 2))+'*'+'scaler('+columns[i]+')'+'+' for i in range(len(columns))]+[str(model.intercept_)+')']
print(''.join(equation))
Price=exp(-0.66*scaler(Fire Rate)+2.99*scaler(Magazine Capacity)+-9.32*scaler(Spread ADS)+8.89*scaler(Spread HIP)+6.11*scaler(HDMG_0)+15.49*scaler(BDMG_0)+-29.5*scaler(LDMG_0)+7.3929436707212215)
warnings.filterwarnings('default')
The dataset here is taken from here on kaggle and it's taken from vlr.gg .
There is four tables. The top level is Matches that will tell you teams playing and match (map) score. Game is the next level that breaks down the specific maps played. Then GameRounds gives a round by round breakdown which shows who won, economy of each team, win type, and buy type, whenever the info is available. The game rounds are packaged in one string that you should be able to cast as a json. Lastly there is GameScoreboard which gives you the player performance, as well as things like number of first kills, first deaths, 2Ks, 3Ks, One v Ones, One v Twos, ect.
This content introduction is made by Joshua Broas on kaagle.
warnings.filterwarnings('ignore')
import sqlite3
import pandas as pd
import sqlalchemy
con = sqlite3.connect(r"C:\Users\anass\Programmation\EDA\Valorant\valorant.sqlite")
cursor = con.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables_names=[name[0] for name in cursor.fetchall()]
cursor.close()
con.close()
cnx = sqlite3.connect(r"C:\Users\anass\Programmation\EDA\Valorant\valorant.sqlite")
list_of_dataframes = []
for name in tables_names:
list_of_dataframes.append(pd.read_sql_query("SELECT * FROM {}".format(name), cnx))
tables_names
['Matches', 'Games', 'Game_Rounds', 'Game_Scoreboard']
df_matches ,df_games , df_rounds , df_scoreboard =tuple(list_of_dataframes)
del list_of_dataframes
df_matches.head()
| MatchID | Date | Patch | EventID | EventName | EventStage | Team1ID | Team2ID | Team1 | Team2 | Team1_MapScore | Team2_MapScore | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 62393 | 2022-01-08 15:30:00 | Patch 3.12 | 826 | Nerd Street Gamers Winter Championship - Regio... | Group Stage: Decider (A) | 6903 | 6020 | Booster Seat Gaming | Pho Real | 2 | 1 |
| 1 | 62403 | 2022-01-08 15:30:00 | Patch 3.12 | 826 | Nerd Street Gamers Winter Championship - Regio... | Group Stage: Decider (C) | 7046 | 7047 | Bjor's Kittens | Mugiwara | 2 | 0 |
| 2 | 62391 | 2022-01-08 12:30:00 | Patch 3.12 | 826 | Nerd Street Gamers Winter Championship - Regio... | Group Stage: Winner's (A) | 6461 | 6903 | Akrew | Booster Seat Gaming | 2 | 1 |
| 3 | 62396 | 2022-01-08 12:30:00 | Patch 3.12 | 826 | Nerd Street Gamers Winter Championship - Regio... | Group Stage: Winner's (B) | 6164 | 7043 | Radiance | sameROFLMAO | 2 | 0 |
| 4 | 62401 | 2022-01-08 12:30:00 | Patch 3.12 | 826 | Nerd Street Gamers Winter Championship - Regio... | Group Stage: Winner's (C) | 7045 | 7046 | Salt and Vinegar | Bjor's Kittens | 2 | 0 |
df_games.head()
| GameID | MatchID | Map | Team1ID | Team2ID | Team1 | Team2 | Winner | Team1_TotalRounds | Team2_TotalRounds | ... | Team1_FullBuyWon | Team2_PistolWon | Team2_Eco | Team2_EcoWon | Team2_SemiEco | Team2_SemiEcoWon | Team2_SemiBuy | Team2_SemiBuyWon | Team2_FullBuy | Team2_FullBuyWon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60894 | 62393 | Breeze | 6903 | 6020 | Booster Seat Gaming | Pho Real | Booster Seat Gaming | 13 | 7 | ... | 8.0 | 0.0 | 4.0 | 0.0 | 2.0 | 0.0 | 4.0 | 1.0 | 10.0 | 6.0 |
| 1 | 60895 | 62393 | Bind | 6903 | 6020 | Booster Seat Gaming | Pho Real | Pho Real | 2 | 13 | ... | 1.0 | 2.0 | 2.0 | 2.0 | 0.0 | 0.0 | 4.0 | 3.0 | 9.0 | 8.0 |
| 2 | 60896 | 62393 | Haven | 6903 | 6020 | Booster Seat Gaming | Pho Real | Booster Seat Gaming | 13 | 8 | ... | 9.0 | 1.0 | 2.0 | 1.0 | 2.0 | 0.0 | 6.0 | 2.0 | 11.0 | 5.0 |
| 3 | 60924 | 62403 | Icebox | 7046 | 7047 | Bjor's Kittens | Mugiwara | Bjor's Kittens | 13 | 6 | ... | 8.0 | 0.0 | 4.0 | 0.0 | 1.0 | 0.0 | 2.0 | 1.0 | 12.0 | 5.0 |
| 4 | 60925 | 62403 | Haven | 7046 | 7047 | Bjor's Kittens | Mugiwara | Bjor's Kittens | 13 | 9 | ... | 11.0 | 1.0 | 3.0 | 2.0 | 3.0 | 0.0 | 4.0 | 3.0 | 12.0 | 4.0 |
5 rows × 36 columns
df_rounds.head()
| GameID | Team1ID | Team2ID | RoundHistory | |
|---|---|---|---|---|
| 0 | 60894 | 6903 | 6020 | {1: {'RoundWinner': 'BOOS', 'ScoreAfterRound':... |
| 1 | 60895 | 6903 | 6020 | {1: {'RoundWinner': 'PHO ', 'ScoreAfterRound':... |
| 2 | 60896 | 6903 | 6020 | {1: {'RoundWinner': 'PHO ', 'ScoreAfterRound':... |
| 3 | 60924 | 7046 | 7047 | {1: {'RoundWinner': 'BJOR', 'ScoreAfterRound':... |
| 4 | 60925 | 7046 | 7047 | {1: {'RoundWinner': 'BJOR', 'ScoreAfterRound':... |
df_scoreboard.head()
| GameID | PlayerID | PlayerName | TeamAbbreviation | Agent | ACS | Kills | Deaths | Assists | PlusMinus | ... | Num_4Ks | Num_5Ks | OnevOne | OnevTwo | OnevThree | OnevFour | OnevFive | Econ | Plants | Defuses | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60894 | 8419 | Reduxx | Boos | jett | 313.0 | 24.0 | 10.0 | 3.0 | 14.0 | ... | 2.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 74.0 | 0.0 | 0.0 |
| 1 | 60894 | 466 | ChurmZ | Boos | chamber | 227.0 | 16.0 | 10.0 | 7.0 | 6.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 67.0 | 2.0 | 0.0 |
| 2 | 60894 | 3712 | diaamond | Boos | sova | 226.0 | 17.0 | 9.0 | 8.0 | 8.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 58.0 | 3.0 | 0.0 |
| 3 | 60894 | 5099 | Boltzy | Boos | viper | 218.0 | 17.0 | 12.0 | 2.0 | 5.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.0 | 0.0 | 0.0 |
| 4 | 60894 | 3983 | Virtyy | Boos | skye | 80.0 | 5.0 | 13.0 | 3.0 | -8.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 |
5 rows × 28 columns
df_games.head()
| GameID | MatchID | Map | Team1ID | Team2ID | Team1 | Team2 | Winner | Team1_TotalRounds | Team2_TotalRounds | ... | Team1_FullBuyWon | Team2_PistolWon | Team2_Eco | Team2_EcoWon | Team2_SemiEco | Team2_SemiEcoWon | Team2_SemiBuy | Team2_SemiBuyWon | Team2_FullBuy | Team2_FullBuyWon | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60894 | 62393 | Breeze | 6903 | 6020 | Booster Seat Gaming | Pho Real | Booster Seat Gaming | 13 | 7 | ... | 8.0 | 0.0 | 4.0 | 0.0 | 2.0 | 0.0 | 4.0 | 1.0 | 10.0 | 6.0 |
| 1 | 60895 | 62393 | Bind | 6903 | 6020 | Booster Seat Gaming | Pho Real | Pho Real | 2 | 13 | ... | 1.0 | 2.0 | 2.0 | 2.0 | 0.0 | 0.0 | 4.0 | 3.0 | 9.0 | 8.0 |
| 2 | 60896 | 62393 | Haven | 6903 | 6020 | Booster Seat Gaming | Pho Real | Booster Seat Gaming | 13 | 8 | ... | 9.0 | 1.0 | 2.0 | 1.0 | 2.0 | 0.0 | 6.0 | 2.0 | 11.0 | 5.0 |
| 3 | 60924 | 62403 | Icebox | 7046 | 7047 | Bjor's Kittens | Mugiwara | Bjor's Kittens | 13 | 6 | ... | 8.0 | 0.0 | 4.0 | 0.0 | 1.0 | 0.0 | 2.0 | 1.0 | 12.0 | 5.0 |
| 4 | 60925 | 62403 | Haven | 7046 | 7047 | Bjor's Kittens | Mugiwara | Bjor's Kittens | 13 | 9 | ... | 11.0 | 1.0 | 3.0 | 2.0 | 3.0 | 0.0 | 4.0 | 3.0 | 12.0 | 4.0 |
5 rows × 36 columns
df_games.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 15888 entries, 0 to 15887 Data columns (total 36 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 GameID 15888 non-null object 1 MatchID 15888 non-null object 2 Map 15888 non-null object 3 Team1ID 15888 non-null int64 4 Team2ID 15888 non-null int64 5 Team1 15888 non-null object 6 Team2 15888 non-null object 7 Winner 15888 non-null object 8 Team1_TotalRounds 15888 non-null int64 9 Team2_TotalRounds 15888 non-null int64 10 Team1_SideFirstHalf 15888 non-null object 11 Team2_SideFirstHalf 15888 non-null object 12 Team1_RoundsFirstHalf 15888 non-null int64 13 Team1_RoundsSecondtHalf 15888 non-null int64 14 Team1_RoundsOT 15888 non-null int64 15 Team2_RoundsFirstHalf 15888 non-null int64 16 Team2_RoundsSecondtHalf 15888 non-null int64 17 Team2_RoundsOT 15888 non-null int64 18 Team1_PistolWon 14854 non-null float64 19 Team1_Eco 14854 non-null float64 20 Team1_EcoWon 14854 non-null float64 21 Team1_SemiEco 14854 non-null float64 22 Team1_SemiEcoWon 14854 non-null float64 23 Team1_SemiBuy 14854 non-null float64 24 Team1_SemiBuyWon 14854 non-null float64 25 Team1_FullBuy 14854 non-null float64 26 Team1_FullBuyWon 14854 non-null float64 27 Team2_PistolWon 14854 non-null float64 28 Team2_Eco 14854 non-null float64 29 Team2_EcoWon 14854 non-null float64 30 Team2_SemiEco 14854 non-null float64 31 Team2_SemiEcoWon 14854 non-null float64 32 Team2_SemiBuy 14854 non-null float64 33 Team2_SemiBuyWon 14854 non-null float64 34 Team2_FullBuy 14854 non-null float64 35 Team2_FullBuyWon 14854 non-null float64 dtypes: float64(18), int64(10), object(8) memory usage: 4.4+ MB
df_matches.isnull().sum().sum()
475
display(df_scoreboard.describe().round(2))
| ACS | Kills | Deaths | Assists | PlusMinus | KAST_Percent | ADR | HS_Percent | FirstKills | FirstDeaths | ... | Num_4Ks | Num_5Ks | OnevOne | OnevTwo | OnevThree | OnevFour | OnevFive | Econ | Plants | Defuses | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 157409.00 | 157449.00 | 157449.00 | 157449.00 | 156186.00 | 3367.00 | 149064.00 | 148467.00 | 157409.00 | 148474.00 | ... | 147744.00 | 147744.00 | 147744.00 | 147744.00 | 147744.00 | 147744.00 | 147744.00 | 147744.00 | 147744.00 | 147744.00 |
| mean | 201.13 | 14.37 | 14.38 | 5.15 | -0.01 | 0.70 | 130.69 | 0.24 | 2.03 | 2.05 | ... | 0.17 | 0.02 | 0.20 | 0.10 | 0.03 | 0.00 | 0.00 | 53.76 | 1.36 | 0.41 |
| std | 65.09 | 5.62 | 4.06 | 3.11 | 6.33 | 0.13 | 39.67 | 0.09 | 1.74 | 1.61 | ... | 0.42 | 0.14 | 0.45 | 0.32 | 0.16 | 0.07 | 0.02 | 18.90 | 1.66 | 0.66 |
| min | 0.00 | 0.00 | 0.00 | 0.00 | -20.00 | 0.14 | 0.00 | 0.00 | 0.00 | 0.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | -54.00 | 0.00 | 0.00 |
| 25% | 158.00 | 10.00 | 12.00 | 3.00 | -5.00 | 0.62 | 103.00 | 0.17 | 1.00 | 1.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 41.00 | 0.00 | 0.00 |
| 50% | 197.00 | 14.00 | 15.00 | 5.00 | 0.00 | 0.71 | 128.00 | 0.23 | 2.00 | 2.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 51.00 | 1.00 | 0.00 |
| 75% | 241.00 | 18.00 | 17.00 | 7.00 | 4.00 | 0.79 | 155.00 | 0.29 | 3.00 | 3.00 | ... | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 0.00 | 64.00 | 2.00 | 1.00 |
| max | 637.00 | 56.00 | 38.00 | 74.00 | 30.00 | 1.00 | 405.00 | 1.00 | 17.00 | 13.00 | ... | 6.00 | 2.00 | 5.00 | 4.00 | 3.00 | 1.00 | 1.00 | 566.00 | 15.00 | 6.00 |
8 rows × 23 columns
warnings.filterwarnings('ignore')
sns_plot = sns.distplot(df_scoreboard["Kills"])
sns_plot = sns.distplot(df_scoreboard["Assists"])
sns_plot = sns.distplot(df_scoreboard["Plants"])
df_scoreboard.head()
| GameID | PlayerID | PlayerName | TeamAbbreviation | Agent | ACS | Kills | Deaths | Assists | PlusMinus | ... | Num_4Ks | Num_5Ks | OnevOne | OnevTwo | OnevThree | OnevFour | OnevFive | Econ | Plants | Defuses | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60894 | 8419 | Reduxx | Boos | jett | 313.0 | 24.0 | 10.0 | 3.0 | 14.0 | ... | 2.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 74.0 | 0.0 | 0.0 |
| 1 | 60894 | 466 | ChurmZ | Boos | chamber | 227.0 | 16.0 | 10.0 | 7.0 | 6.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 67.0 | 2.0 | 0.0 |
| 2 | 60894 | 3712 | diaamond | Boos | sova | 226.0 | 17.0 | 9.0 | 8.0 | 8.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 58.0 | 3.0 | 0.0 |
| 3 | 60894 | 5099 | Boltzy | Boos | viper | 218.0 | 17.0 | 12.0 | 2.0 | 5.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.0 | 0.0 | 0.0 |
| 4 | 60894 | 3983 | Virtyy | Boos | skye | 80.0 | 5.0 | 13.0 | 3.0 | -8.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 |
5 rows × 28 columns
fig, ax = plt.subplots(5, 2, figsize = (15, 13))
sns.boxplot(x= df_scoreboard["Kills"], ax = ax[0,0])
sns.distplot(df_scoreboard['Kills'], ax = ax[0,1])
sns.boxplot(x= df_scoreboard["Deaths"], ax = ax[1,0])
sns.distplot(df_scoreboard['Deaths'], ax = ax[1,1])
sns.boxplot(x= df_scoreboard["Assists"], ax = ax[2,0])
sns.distplot(df_scoreboard['Assists'], ax = ax[2,1])
sns.boxplot(x= df_scoreboard["Plants"], ax = ax[3,0])
sns.distplot(df_scoreboard['Plants'], ax = ax[3,1])
sns.boxplot(x= df_scoreboard["Defuses"], ax = ax[4,0])
sns.distplot(df_scoreboard['Defuses'], ax = ax[4,1])
plt.tight_layout()
df_scoreboard_cleaned = df_scoreboard.dropna()
df_scoreboard_cleaned
| GameID | PlayerID | PlayerName | TeamAbbreviation | Agent | ACS | Kills | Deaths | Assists | PlusMinus | ... | Num_4Ks | Num_5Ks | OnevOne | OnevTwo | OnevThree | OnevFour | OnevFive | Econ | Plants | Defuses | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60894 | 8419 | Reduxx | Boos | jett | 313.0 | 24.0 | 10.0 | 3.0 | 14.0 | ... | 2.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 74.0 | 0.0 | 0.0 |
| 1 | 60894 | 466 | ChurmZ | Boos | chamber | 227.0 | 16.0 | 10.0 | 7.0 | 6.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 67.0 | 2.0 | 0.0 |
| 2 | 60894 | 3712 | diaamond | Boos | sova | 226.0 | 17.0 | 9.0 | 8.0 | 8.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 58.0 | 3.0 | 0.0 |
| 3 | 60894 | 5099 | Boltzy | Boos | viper | 218.0 | 17.0 | 12.0 | 2.0 | 5.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.0 | 0.0 | 0.0 |
| 4 | 60894 | 3983 | Virtyy | Boos | skye | 80.0 | 5.0 | 13.0 | 3.0 | -8.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 21883 | 53281 | 2126 | Shawn | GEN | sage | 196.0 | 12.0 | 13.0 | 3.0 | -1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 43.0 | 3.0 | 0.0 |
| 21884 | 53281 | 4927 | NaturE | GEN | jett | 149.0 | 10.0 | 13.0 | 1.0 | -3.0 | ... | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 39.0 | 0.0 | 1.0 |
| 21885 | 53281 | 156 | Temperature | GEN | sova | 123.0 | 7.0 | 12.0 | 3.0 | -5.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 44.0 | 0.0 | 1.0 |
| 21886 | 53281 | 64 | gMd | GEN | omen | 121.0 | 6.0 | 16.0 | 4.0 | -10.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 32.0 | 0.0 | 0.0 |
| 21887 | 53281 | 8716 | koosta | GEN | viper | 101.0 | 5.0 | 14.0 | 7.0 | -9.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 35.0 | 0.0 | 0.0 |
3367 rows × 28 columns
#create scatterplot of hours vs. score
plt.scatter(df_scoreboard["Kills"], df_scoreboard["Deaths"])
plt.title('Hours Studied vs. Exam Score')
plt.xlabel('Hours Studied')
plt.ylabel('Exam Score')
Text(0, 0.5, 'Exam Score')
#create scatterplot of hours vs. score
plt.scatter(df_scoreboard["Kills"], df_scoreboard["Assists"])
plt.title('Kills vs. Assists')
plt.xlabel('Kills')
plt.ylabel('Assists')
Text(0, 0.5, 'Assists')
from fitter import Fitter, get_common_distributions, get_distributions
Kills = df_scoreboard["Kills"].values
Kills = Kills[np.logical_not(np.isnan(Kills))]
f = Fitter(Kills,
distributions=['gamma',
'lognorm',
"beta",
"burr",
"norm"])
f.fit()
f.summary()
| sumsquare_error | aic | bic | kl_div | |
|---|---|---|---|---|
| lognorm | 0.071433 | 1576.168643 | -2.299640e+06 | inf |
| beta | 0.071440 | 1600.478699 | -2.299614e+06 | inf |
| gamma | 0.071440 | 1598.667604 | -2.299626e+06 | inf |
| norm | 0.071851 | 1947.547679 | -2.298734e+06 | inf |
| burr | 0.073985 | 1130.418124 | -2.294103e+06 | inf |
from distfit import distfit
# Initialize model
dist = distfit()
# Find best theoretical distribution for empirical data X
dist.fit_transform(Kills)
dist.plot()
[distfit] >fit.. [distfit] >transform.. [distfit] >[norm ] [0.00 sec] [RSS: 0.0039462] [loc=14.368 scale=5.623] [distfit] >[expon ] [0.00 sec] [RSS: 0.0309937] [loc=0.000 scale=14.368] [distfit] >[pareto ] [2.15 sec] [RSS: 0.0644799] [loc=-2.696 scale=2.696] [distfit] >[dweibull ] [1.19 sec] [RSS: 0.0041518] [loc=13.608 scale=4.840] [distfit] >[t ] [2.80 sec] [RSS: 0.0040067] [loc=14.322 scale=5.416] [distfit] >[genextreme] [5.77 sec] [RSS: 0.0039802] [loc=11.953 scale=5.301] [distfit] >[gamma ] [1.04 sec] [RSS: 0.0037878] [loc=-31.107 scale=0.694] [distfit] >[lognorm ] [5.98 sec] [RSS: 0.0037917] [loc=-52.337 scale=66.470] [distfit] >[beta ] [3.61 sec] [RSS: 0.0037878] [loc=-31.072 scale=11023959.598] [distfit] >[uniform ] [0.00 sec] [RSS: 0.0332003] [loc=0.000 scale=56.000] [distfit] >[loggamma ] [2.24 sec] [RSS: 0.0039749] [loc=-1368.749 scale=195.088] [distfit] >Compute confidence interval [parametric] [distfit] >plot..
(<Figure size 720x576 with 1 Axes>,
<AxesSubplot:title={'center':'\nbeta\na=65.38, b=15860599.41, loc=-31.07, scale=11023959.60'}, xlabel='Values', ylabel='Frequency'>)
df_scoreboard
| GameID | PlayerID | PlayerName | TeamAbbreviation | Agent | ACS | Kills | Deaths | Assists | PlusMinus | ... | Num_4Ks | Num_5Ks | OnevOne | OnevTwo | OnevThree | OnevFour | OnevFive | Econ | Plants | Defuses | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 60894 | 8419 | Reduxx | Boos | jett | 313.0 | 24.0 | 10.0 | 3.0 | 14.0 | ... | 2.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 74.0 | 0.0 | 0.0 |
| 1 | 60894 | 466 | ChurmZ | Boos | chamber | 227.0 | 16.0 | 10.0 | 7.0 | 6.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 67.0 | 2.0 | 0.0 |
| 2 | 60894 | 3712 | diaamond | Boos | sova | 226.0 | 17.0 | 9.0 | 8.0 | 8.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 58.0 | 3.0 | 0.0 |
| 3 | 60894 | 5099 | Boltzy | Boos | viper | 218.0 | 17.0 | 12.0 | 2.0 | 5.0 | ... | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 48.0 | 0.0 | 0.0 |
| 4 | 60894 | 3983 | Virtyy | Boos | skye | 80.0 | 5.0 | 13.0 | 3.0 | -8.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 157934 | 13 | 24 | Gover | 0.0 | 0.0 | 0.0 | 0.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| 157935 | 13 | 25 | Jack1 | 0.0 | 0.0 | 0.0 | 0.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| 157936 | 13 | 26 | Rewind | 0.0 | 0.0 | 0.0 | 0.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| 157937 | 13 | 27 | Woo1y | 0.0 | 0.0 | 0.0 | 0.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ||
| 157938 | 13 | 28 | DrasseL | 0.0 | 0.0 | 0.0 | 0.0 | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
157939 rows × 28 columns
len(df_agents)
18
df_agents= df_scoreboard.groupby("Agent").mean().sort_values(by='ACS', ascending=False)
df_agents.head()
| ACS | Kills | Deaths | Assists | PlusMinus | KAST_Percent | ADR | HS_Percent | FirstKills | FirstDeaths | ... | Num_4Ks | Num_5Ks | OnevOne | OnevTwo | OnevThree | OnevFour | OnevFive | Econ | Plants | Defuses | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Agent | |||||||||||||||||||||
| raze | 235.210910 | 16.141252 | 15.286146 | 4.715481 | 0.855106 | 0.686279 | 150.846141 | 0.180850 | 2.853556 | 2.775425 | ... | 0.226162 | 0.029545 | 0.151125 | 0.080460 | 0.016675 | 0.002914 | 0.000486 | 59.121661 | 0.591145 | 0.427311 |
| reyna | 230.294592 | 16.426312 | 15.196395 | 4.201929 | 1.229918 | 0.690932 | 146.345062 | 0.259528 | 2.904016 | 2.838500 | ... | 0.274841 | 0.040169 | 0.167995 | 0.081965 | 0.021142 | 0.004228 | 0.000163 | 58.550171 | 0.490649 | 0.287689 |
| jett | 230.045706 | 16.582086 | 15.028903 | 3.306768 | 1.553183 | 0.682714 | 141.720387 | 0.229524 | 3.656197 | 3.025411 | ... | 0.262311 | 0.034978 | 0.166248 | 0.082346 | 0.020850 | 0.003612 | 0.000366 | 58.974258 | 0.496502 | 0.344337 |
| phoenix | 221.685185 | 15.572784 | 15.042814 | 4.794029 | 0.529970 | 0.630000 | 140.427653 | 0.252350 | 2.768750 | 2.689311 | ... | 0.216443 | 0.027150 | 0.156306 | 0.081198 | 0.019792 | 0.003552 | 0.000000 | 56.124841 | 0.747780 | 0.485156 |
| yoru | 213.034483 | 15.088670 | 15.487685 | 4.339901 | -0.399015 | NaN | 135.788177 | 0.250493 | 2.743842 | 2.817734 | ... | 0.246305 | 0.024631 | 0.187192 | 0.064039 | 0.024631 | 0.000000 | 0.000000 | 52.837438 | 0.748768 | 0.285714 |
5 rows × 23 columns
#df_scoreboard.groupby(by="Agent").sum().sort_values(by='Kills', ascending=False)
def return_sorted2(df_new,col_name):
sorted_df = df_new.sort_values(by=col_name, ascending=False)
return {'Agent': sorted_df.index.to_list(), col_name: sorted_df[col_name].to_list()}
ACS_dict = return_sorted2(df_agents,'ACS')
fig_ACS= px.bar(ACS_dict, x = ACS_dict['Agent'], y = 'ACS', title = 'Average number of ACS by Agent')
fig_ACS.show()
kills_dict = return_sorted2(df_agents,'Kills')
fig_kills= px.bar(kills_dict, x = kills_dict['Agent'], y = 'Kills', title = 'Average number of Kills by Agent')
fig_kills.show()
Assists_dict = return_sorted2(df_agents,'Assists')
fig_Assists= px.bar(Assists_dict, x = Assists_dict['Agent'], y = 'Assists', title = 'Average number of Assists by Agent')
fig_Assists.show()
Deaths_dict = return_sorted2(df_agents,'Deaths')
fig_Deaths= px.bar(Deaths_dict, x = Deaths_dict['Agent'], y = 'Deaths', title = 'Average number of Deaths by Agent')
fig_Deaths.show()
FirstDeaths_dict = return_sorted2(df_agents,'FirstDeaths')
fig_FirstDeaths= px.bar(FirstDeaths_dict, x = Deaths_dict['Agent'], y = 'FirstDeaths', title = 'Average number of FirstDeaths by Agent')
fig_FirstDeaths.show()
Plants_dict = return_sorted2(df_agents,'Plants')
fig_Plants= px.bar(Plants_dict, x = Deaths_dict['Agent'], y = 'Plants', title = 'Average number of Plants by Agent')
fig_Plants.show()